import sqlite3
from sqlite3.dbapi2 import Cursor
from sqlite3 import Error
import pandas as pd
from pathlib import Path
from itertools import combinations
from itertools import permutations
import numpy as np
import datetime
import plotly.graph_objects as go
import plotly.express as px
pd.options.plotting.backend = "plotly"
import plotly.offline as pyo
pyo.init_notebook_mode()
DATABASE = 'correlation.sqlite'
DATABASE_PATH = str(Path(".").resolve().parent.parent) + '/' + DATABASE
def create_connection(db_file):
""" create a database connection to a SQLite database """
conn = None
try:
conn = sqlite3.connect(db_file)
cursor = conn.cursor()
cursor.execute("CREATE TABLE if not exists Prices (id INTEGER PRIMARY KEY, symbol TEXT, price REAL, created_at TEXT)")
print(sqlite3.version)
except Error as e:
print(e)
finally:
if conn:
conn.close()
def get_prices(conn, currency):
cursor = conn.cursor()
cursor.execute("SELECT * FROM Prices WHERE symbol = '" + currency + "' ORDER BY id DESC ")
result = cursor.fetchall()
return result
conn = sqlite3.connect(DATABASE_PATH)
#currency_pairs = ['BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'DASHUSDT', 'XLMUSDT', 'LTCBTC', 'ETHBTC', 'GASBTC', 'WTCBTC', 'TROYUSDT']
#currency_pairs = ['BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'ADAUSDT', 'SHIBUSDT', 'DOGEUSDT', 'DOTUSDT']
#currency_pairs = ['BTCUSDT', 'ETHUSDT', 'SOLUSDT', 'ETCUSDT']
currencies = ['BTCUSDT', 'ETHUSDT', 'SOLUSDT', 'ETCUSDT']
df = pd.DataFrame()
for currency in currencies:
results = get_prices(conn, currency)
df_currency = pd.DataFrame(results, columns=['Id', 'Symbol', currency , 'Created_at'])
df_currency = df_currency[["Created_at", currency]]
#print(df_currency)
if(df.empty):
df = df_currency
else:
#df.merge(df_currency, left_on='Created_at', right_on='Created_at')
df = df.merge(df_currency, how='inner', on='Created_at')
#df.set_index('Created_at',inplace=True)
df = df[df['Created_at'] < '2022-08-17T23:43:00.000']
df
| Created_at | BTCUSDT | ETHUSDT | SOLUSDT | ETCUSDT | |
|---|---|---|---|---|---|
| 1 | 2022-08-17T23:40:05.000Z | 23339.96 | 1834.36 | 40.48 | 40.27 |
| 2 | 2022-08-17T23:40:03.000Z | 23340.90 | 1834.36 | 40.48 | 40.28 |
| 3 | 2022-08-17T23:40:01.000Z | 23340.02 | 1834.35 | 40.48 | 40.28 |
| 4 | 2022-08-17T23:40:00.000Z | 23342.91 | 1834.36 | 40.48 | 40.28 |
| 5 | 2022-08-17T23:39:59.000Z | 23337.38 | 1834.01 | 40.47 | 40.27 |
| ... | ... | ... | ... | ... | ... |
| 447 | 2022-08-17T23:29:32.000Z | 23305.68 | 1831.10 | 40.34 | 39.98 |
| 448 | 2022-08-17T23:29:31.000Z | 23306.12 | 1831.11 | 40.33 | 39.98 |
| 449 | 2022-08-17T23:29:29.000Z | 23309.18 | 1831.49 | 40.33 | 39.97 |
| 450 | 2022-08-17T23:29:28.000Z | 23308.23 | 1831.49 | 40.33 | 39.97 |
| 451 | 2022-08-17T23:29:27.000Z | 23308.80 | 1831.45 | 40.32 | 39.97 |
451 rows × 5 columns
df_scale = df.copy()
for currency in currencies:
maxvalue = df_scale[currency].max()
minvalue = df_scale[currency].min()
difmaxmin = maxvalue - minvalue
df_scale[currency] = (df_scale[currency] - minvalue) / difmaxmin
df_scale
df_scale
| Created_at | BTCUSDT | ETHUSDT | SOLUSDT | ETCUSDT | |
|---|---|---|---|---|---|
| 1 | 2022-08-17T23:40:05.000Z | 0.652198 | 0.558528 | 0.64 | 0.714286 |
| 2 | 2022-08-17T23:40:03.000Z | 0.669861 | 0.558528 | 0.64 | 0.738095 |
| 3 | 2022-08-17T23:40:01.000Z | 0.653326 | 0.556856 | 0.64 | 0.738095 |
| 4 | 2022-08-17T23:40:00.000Z | 0.707629 | 0.558528 | 0.64 | 0.738095 |
| 5 | 2022-08-17T23:39:59.000Z | 0.603720 | 0.500000 | 0.60 | 0.714286 |
| ... | ... | ... | ... | ... | ... |
| 447 | 2022-08-17T23:29:32.000Z | 0.008080 | 0.013378 | 0.08 | 0.023810 |
| 448 | 2022-08-17T23:29:31.000Z | 0.016347 | 0.015050 | 0.04 | 0.023810 |
| 449 | 2022-08-17T23:29:29.000Z | 0.073844 | 0.078595 | 0.04 | 0.000000 |
| 450 | 2022-08-17T23:29:28.000Z | 0.055994 | 0.078595 | 0.04 | 0.000000 |
| 451 | 2022-08-17T23:29:27.000Z | 0.066704 | 0.071906 | 0.00 | 0.000000 |
451 rows × 5 columns
fig = go.Figure()
pairs = ['SOLUSDT', 'ETHUSDT']
for currency in currencies:
fig.add_trace(go.Scatter(x=df_scale.Created_at, y=df_scale[currency],#,.rolling(3).mean(),
mode='lines',
name=currency))
#fig.add_trace(go.Scatter(x=df_scale.Created_at, y=df_scale['ETCUSDT'].shift(8),
# mode='lines',
# name='ETHUSDT(shift)'))
fig.show()
class CorrClass():
pass
pages = 10
windows = 15
lag = 10
currency_permutations = list(permutations(currencies, 2))
correlation_pair = currency_permutations[1]
currency_permutations
df[['BTCUSDT', 'ETHUSDT']]
| BTCUSDT | ETHUSDT | |
|---|---|---|
| 1 | 23339.96 | 1834.36 |
| 2 | 23340.90 | 1834.36 |
| 3 | 23340.02 | 1834.35 |
| 4 | 23342.91 | 1834.36 |
| 5 | 23337.38 | 1834.01 |
| ... | ... | ... |
| 447 | 23305.68 | 1831.10 |
| 448 | 23306.12 | 1831.11 |
| 449 | 23309.18 | 1831.49 |
| 450 | 23308.23 | 1831.49 |
| 451 | 23308.80 | 1831.45 |
451 rows × 2 columns
print('Starting analizing correlations: ', correlation_pair[0], '-', correlation_pair[1])
for xx in currency_permutations:
pair0 = xx[0]
pair1 = xx[1]
start_time = datetime.datetime.now()
correlations = []
correlations_list = []
correlations_list_summary = []
correlations_pages = []
for x in range(pages):
correlation_windows = []
df_copy = df.copy()
df_copy = df_copy[[pair0, pair1]]
df_copy[pair0] = df_copy[pair0].shift(-x)
for y in range(3, windows):
correlation_lags = []
for z in range(lag):
correlation = np.corrcoef(df_copy.shift(-z).head(y)[pair0]
, df_copy.shift(-z).head(y)[pair1] )
#print('Correlation P(', x ,'), L(', y, '), W(', z ,'): ', correlation[0][1])
if not np.isnan(correlation[0][1]):
c = CorrClass()
c.corr = correlation[0][1]
c.page = x
c.window = y
c.lag = z
c.currency1 = correlation_pair[0]
c.currency2 = correlation_pair[1]
correlations_list.append(c)
correlation_lags.append(correlation[0][1])
corr_array = np.asarray(correlation_lags)
stv = np.std(corr_array)
mean = np.mean(corr_array)
c = CorrClass()
c.page = x
c.window = y
c.stv = stv
c.mean = mean
c.currency1 = pair0
c.currency2 = pair1
if(abs(mean) > .90):
correlations_list_summary.append(c)
correlation_windows.append(correlation_lags)
correlations_pages.append(correlation_lags)
#print(correlations_pages)
#print(correlations_list_summary)
end_time = datetime.datetime.now()
difference = end_time - start_time
print('Finished analizing correlations: ', pair0, '-', pair1)
print('Analysis took:', difference.seconds, 'seconds')
Starting analizing correlations: BTCUSDT - SOLUSDT Finished analizing correlations: BTCUSDT - ETHUSDT Analysis took: 0 seconds Finished analizing correlations: BTCUSDT - SOLUSDT Analysis took: 0 seconds Finished analizing correlations: BTCUSDT - ETCUSDT Analysis took: 0 seconds Finished analizing correlations: ETHUSDT - BTCUSDT Analysis took: 0 seconds Finished analizing correlations: ETHUSDT - SOLUSDT Analysis took: 0 seconds Finished analizing correlations: ETHUSDT - ETCUSDT Analysis took: 0 seconds Finished analizing correlations: SOLUSDT - BTCUSDT Analysis took: 0 seconds Finished analizing correlations: SOLUSDT - ETHUSDT Analysis took: 0 seconds Finished analizing correlations: SOLUSDT - ETCUSDT Analysis took: 0 seconds Finished analizing correlations: ETCUSDT - BTCUSDT Analysis took: 0 seconds Finished analizing correlations: ETCUSDT - ETHUSDT Analysis took: 0 seconds Finished analizing correlations: ETCUSDT - SOLUSDT Analysis took: 0 seconds
for c in correlations_list_summary:
print(c.currency1 ,'-', c.currency2 ,': page (', c.page , '), window (', c.window, '), stv(', c.stv ,'), mean(', c.mean ,')')
ETCUSDT - SOLUSDT : page ( 0 ), window ( 3 ), stv( 0.0 ), mean( 1.0 ) ETCUSDT - SOLUSDT : page ( 0 ), window ( 4 ), stv( 0.0 ), mean( 1.0 ) ETCUSDT - SOLUSDT : page ( 0 ), window ( 5 ), stv( 0.167847658947264 ), mean( 0.9030931089239486 ) ETCUSDT - SOLUSDT : page ( 0 ), window ( 6 ), stv( 0.12682648404432204 ), mean( 0.9267766952966369 ) ETCUSDT - SOLUSDT : page ( 8 ), window ( 3 ), stv( 7.850462293418876e-17 ), mean( -1.0 ) ETCUSDT - SOLUSDT : page ( 8 ), window ( 4 ), stv( 0.0 ), mean( -1.0 ) ETCUSDT - SOLUSDT : page ( 8 ), window ( 5 ), stv( 7.850462293418876e-17 ), mean( -1.0 )
df_test = df_scale.copy()
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_test.Created_at, y=df_test['ETCUSDT'],#.rolling(4).mean(),
mode='lines',
name='ETCUSDT'))
fig.add_trace(go.Scatter(x=df_test.Created_at, y=df_test['SOLUSDT'],#.rolling(4).mean(),
mode='lines',
name='SOLUSDT'))
fig.add_trace(go.Scatter(x=df_test.Created_at, y=df_test['SOLUSDT'].shift(-8),
mode='lines',
name='SOLUSDT_shift8'))
fig.show()